setwd("/Users/ab6415/Documents/Work_laptop_stuff/Bioinformatics/Analysis/EpiMALs_PAPER_ANALYSES_Dryad/08_TE_analysis_M1/")

library(ggplot2)
library(MASS)
library(viridis)
## Loading required package: viridisLite
library(msir)
## Package 'msir' version 1.3.2
## Type 'citation("msir")' for citing this R package in publications.
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(RColorBrewer)
library(ggpubr)
library(OneR)


overall_sd<-function(row){
sqsum=0
ncomps=0
for (i in seq(length(row))){
  for (j in seq(length(row))){
    if (i>j){
    sqsum=sqsum+(row[j]-row[i])**2
    ncomps=ncomps+1
    }}} 
return(sqrt(sqsum/ncomps))
}


technical_sd<-function(row){
  sqsum=0
  ncomps=0
  for (i in seq(1,21,2)){
    sqsum=sqsum+(row[i+1]-row[i])**2
    ncomps=ncomps+1
  }
  return(sqrt(sqsum/ncomps))
}


#density
get_density <- function(x, y, ...) {
  dens <- MASS::kde2d(x, y, ...)
  ix <- findInterval(x, dens$x)
  iy <- findInterval(y, dens$y)
  ii <- cbind(ix, iy)
  return(dens$z[ii])
}



loess_fit_and_plotres<-function(dat,thr_type,thr,return_all,plot1_filename,plot2_filename){

dat<-dat[which(dat$log10_mean>0),]

#fit technical cv2 data
l<-loess.sd(x = dat$log10_mean,y = dat$log10_tech_cv2, nsigma = 1.96)

l_fit<-data.frame(x=l$x,y=l$y,sd=l$sd,upper=l$upper,lower=l$lower,ID=dat$ID)
l_fit$Z<-(dat$log10_overall_cv2-l_fit$y)/l_fit$sd
l_fit$pvalue<-pnorm(l_fit$Z,lower.tail = FALSE)

l_fit_padj<-l_fit[which(l_fit$x>0.9999999),]
print(nrow(l_fit_padj))
l_fit_padj$padj<-p.adjust(l_fit_padj$pvalue,method = "fdr")
l_fit<-merge(l_fit,l_fit_padj[,c("padj","ID")],by="ID",all.x=TRUE)
l_fit<-l_fit[order(l_fit$x),]

dat$ID<-rownames(dat)
dat<-merge(dat,l_fit[,c("x","y","upper","lower","Z","pvalue","padj","ID")],by="ID",all.x=TRUE)
rownames(dat)<-dat$ID; dat$ID<-NULL

if (thr_type=="fdr"){
  dat$sig<-rep(0,nrow(dat))
  dat[which(dat$padj<thr),"sig"]<-1
}

else if (thr_type=="pval"){
  dat$sig<-rep(0,nrow(dat))
  dat[which(dat$pvalue<thr),"sig"]<-1
}

dat[which(dat$log10_mean<1),"sig"]<-0


if (return_all==FALSE){

ggplot(dat)+geom_histogram(aes(x=pvalue))
ggplot(dat)+geom_histogram(aes(x=padj))

print(ggplot(dat)+geom_point(aes(dat$log10_mean,dat$log10_overall_cv2),color=alpha("#8da0cb",0.2))+ scale_color_viridis()+geom_line(aes(x=l_fit$x,y=l_fit$lower),color="black",linetype="dashed")+geom_line(aes(x=l_fit$x,y=l_fit$y),color="black")+geom_line(aes(x=l_fit$x,y=l_fit$upper),color="black",linetype="dashed")+geom_point(data=subset(dat,sig==1),aes(dat[which(dat$sig==1),"log10_mean"],dat[which(dat$sig==1),"log10_overall_cv2"]),color=alpha("#66c2a5",0.4))+ylab("log10 cv2")+xlab("log10 mean")+theme_classic())
ggsave(filename = plot1_filename,dpi=150)

dat$density<-get_density(dat$log10_mean,dat$log10_overall_cv2, n = 100)
print(ggplot(dat)+geom_point(aes(dat$log10_mean,dat$log10_overall_cv2,color=density))+ scale_color_viridis()+geom_line(aes(x=l_fit$x,y=l_fit$lower),color="pink",linetype="dashed")+geom_line(aes(x=l_fit$x,y=l_fit$y),color="pink")+geom_line(aes(x=l_fit$x,y=l_fit$upper),color="pink",linetype="dashed")+geom_point(data=subset(dat,sig==1),aes(dat[which(dat$sig==1),"log10_mean"],dat[which(dat$sig==1),"log10_overall_cv2"]),color=alpha("red",0.4))+ylab("log10 cv2")+xlab("log10 mean")+theme_classic())
ggsave(filename = plot2_filename,dpi=150)

}

dat<-dat[order(dat$Z,decreasing=TRUE),]

if (return_all==TRUE){return(dat)}else{return(dat[which(dat$sig==1),])}

}
ttg_DEseqnorm<-read.table("22G_counts/final_counts_table/all_counts_M1_deseqnorm.txt")

cor(log2(ttg_DEseqnorm[,c("C1_25","C2_25","C3_25","J1_25","J2_25","J3_25","PMA1","PMA2","PMA3")]+1))
##           C1_25     C2_25     C3_25     J1_25     J2_25     J3_25      PMA1
## C1_25 1.0000000 0.9497794 0.9561141 0.9620830 0.9554805 0.9560675 0.9588046
## C2_25 0.9497794 1.0000000 0.9557167 0.9616697 0.9564927 0.9563783 0.9568058
## C3_25 0.9561141 0.9557167 1.0000000 0.9705552 0.9639155 0.9636595 0.9653024
## J1_25 0.9620830 0.9616697 0.9705552 1.0000000 0.9748389 0.9758627 0.9772710
## J2_25 0.9554805 0.9564927 0.9639155 0.9748389 1.0000000 0.9712616 0.9693165
## J3_25 0.9560675 0.9563783 0.9636595 0.9758627 0.9712616 1.0000000 0.9695412
## PMA1  0.9588046 0.9568058 0.9653024 0.9772710 0.9693165 0.9695412 1.0000000
## PMA2  0.9546277 0.9529882 0.9623223 0.9737105 0.9660412 0.9663385 0.9710508
## PMA3  0.9600219 0.9593510 0.9677783 0.9805176 0.9727127 0.9729771 0.9778337
##            PMA2      PMA3
## C1_25 0.9546277 0.9600219
## C2_25 0.9529882 0.9593510
## C3_25 0.9623223 0.9677783
## J1_25 0.9737105 0.9805176
## J2_25 0.9660412 0.9727127
## J3_25 0.9663385 0.9729771
## PMA1  0.9710508 0.9778337
## PMA2  1.0000000 0.9745883
## PMA3  0.9745883 1.0000000
colnames(ttg_DEseqnorm)
##  [1] "PMA1"   "PMA2"   "PMA3"   "A1_25"  "A2_25"  "B1_25"  "B2_25"  "C1_25" 
##  [9] "C2_25"  "C3_25"  "D1_25"  "D2_25"  "F1_25"  "F2_25"  "G1_25"  "G2_25" 
## [17] "H1_25"  "H2_25"  "I1_25"  "I2_25"  "J1_25"  "J2_25"  "J3_25"  "K1_25" 
## [25] "K2_25"  "L1_25"  "L2_25"  "A1_100" "A2_100" "B1_100" "B2_100" "C1_100"
## [33] "C2_100" "D1_100" "D2_100" "F1_100" "F2_100" "G1_100" "G2_100" "H1_100"
## [41] "H2_100" "I1_100" "I2_100" "J1_100" "J2_100" "K1_100" "K2_100" "L1_100"
## [49] "L2_100" "A1"     "A2"     "A3"     "A4"     "A5"     "A6"     "A8"    
## [57] "A9"     "A10"    "A11"    "A12"    "A13"    "B1"     "B2"     "B3"    
## [65] "B4"     "B5"     "B6"     "B8"     "B9"     "B10"    "B11"    "B12"   
## [73] "B13"
ttg_DEseqnorm_25<-ttg_DEseqnorm[,c(4:8,10:21,23:27)]
ttg_DEseqnorm_100<-ttg_DEseqnorm[,c(28:49)]
dat_25<-data.frame(mean=apply(ttg_DEseqnorm_25,MARGIN = 1,FUN=mean),
                     sd=apply(ttg_DEseqnorm_25,MARGIN = 1,FUN=sd),
                     overall_sd=apply(ttg_DEseqnorm_25,MARGIN = 1,FUN=overall_sd),
                     tech_sd=apply(ttg_DEseqnorm_25,MARGIN = 1,FUN=technical_sd),
                     ID=rownames(ttg_DEseqnorm_25),
                     max=apply(ttg_DEseqnorm_25,MARGIN = 1,FUN = max))
dat_25<-dat_25[which(dat_25$mean>0),]

#calculate log10cv2 values
dat_25$log10_overall_cv2<-log10((dat_25$overall_sd/dat_25$mean)**2)
dat_25$log10_tech_cv2<-log10((dat_25$tech_sd/dat_25$mean)**2)
dat_25$log10_mean<-log10(dat_25$mean+1)
dat_25$log10_cv2<-log10((dat_25$sd/dat_25$mean)**2)
dat_25$ff<-(dat_25$sd**2)/dat_25$mean


dat_25<-dat_25[order(dat_25$log10_mean,decreasing = FALSE),]


#plot
dat_25$density<-get_density(dat_25$log10_mean,dat_25$log10_overall_cv2, n = 100)
ggplot(dat_25)+geom_point(aes(log10_mean,log10_overall_cv2,color=density))+ scale_color_viridis()+ylim(-3,2)+ggtitle("gen 25, overall cv2")

dat_25$density<-get_density(dat_25$log10_mean,dat_25$log10_tech_cv2, n = 100)
ggplot(dat_25)+geom_point(aes(log10_mean,log10_tech_cv2,color=density))+ scale_color_viridis()+ylim(-3,2)+ggtitle("gen 25, technical cv2")
## Warning: Removed 2 rows containing missing values (geom_point).

gen25_fdr0.1<-loess_fit_and_plotres(dat_25,"fdr",0.1,return_all = FALSE,"gen25_technoise_fit_fdr0.1.pdf","gen25_technoise_fit_fdr0.1_density.pdf")
## [1] 9952
## Warning: Use of `dat$log10_mean` is discouraged. Use `log10_mean` instead.
## Warning: Use of `dat$log10_overall_cv2` is discouraged. Use `log10_overall_cv2`
## instead.
## Saving 7 x 5 in image
## Warning: Use of `dat$log10_mean` is discouraged. Use `log10_mean` instead.

## Warning: Use of `dat$log10_overall_cv2` is discouraged. Use `log10_overall_cv2`
## instead.

## Warning: Use of `dat$log10_mean` is discouraged. Use `log10_mean` instead.

## Warning: Use of `dat$log10_overall_cv2` is discouraged. Use `log10_overall_cv2`
## instead.
## Saving 7 x 5 in image
## Warning: Use of `dat$log10_mean` is discouraged. Use `log10_mean` instead.

## Warning: Use of `dat$log10_overall_cv2` is discouraged. Use `log10_overall_cv2`
## instead.

nrow(gen25_fdr0.1)
## [1] 753
write.table(rownames(gen25_fdr0.1),file="gen_25_HV22Gs_fdr0.1.txt",quote=FALSE,row.names = FALSE)
write.table(rownames(dat_25[which(dat_25$log10_mean>1),]),file="gen_25_HV22Gs_BACKGROUND_LIST.txt",quote=FALSE,row.names = FALSE)
dat_100<-data.frame(mean=apply(ttg_DEseqnorm_100,MARGIN = 1,FUN=mean),
                     sd=apply(ttg_DEseqnorm_100,MARGIN = 1,FUN=sd),
                     overall_sd=apply(ttg_DEseqnorm_100,MARGIN = 1,FUN=overall_sd),
                     tech_sd=apply(ttg_DEseqnorm_100,MARGIN = 1,FUN=technical_sd),
                     ID=rownames(ttg_DEseqnorm_100),
                     max=apply(ttg_DEseqnorm_100,MARGIN = 1,FUN = max))
dat_100<-dat_100[which(dat_100$mean>0),]

#calculate log10cv2 values
dat_100$log10_overall_cv2<-log10((dat_100$overall_sd/dat_100$mean)**2)
dat_100$log10_tech_cv2<-log10((dat_100$tech_sd/dat_100$mean)**2)
dat_100$log10_mean<-log10(dat_100$mean+1)
dat_100$log10_cv2<-log10((dat_100$sd/dat_100$mean)**2)
dat_100$ff<-(dat_100$sd**2)/dat_100$mean


dat_100<-dat_100[order(dat_100$log10_mean,decreasing = FALSE),]


#plot
dat_100$density<-get_density(dat_100$log10_mean,dat_100$log10_overall_cv2, n = 100)
ggplot(dat_100)+geom_point(aes(log10_mean,log10_overall_cv2,color=density))+ scale_color_viridis()+ylim(-3,2)+ggtitle("gen 100, overall cv2")

dat_100$density<-get_density(dat_100$log10_mean,dat_100$log10_tech_cv2, n = 100)
ggplot(dat_100)+geom_point(aes(log10_mean,log10_tech_cv2,color=density))+ scale_color_viridis()+ylim(-3,2)+ggtitle("gen 100, technical cv2")
## Warning: Removed 9 rows containing missing values (geom_point).

gen100_fdr1eminus4<-loess_fit_and_plotres(dat_100,"fdr",1e-4,return_all = FALSE,"gen100_technoise_fit_fdr1Eminus4.pdf","gen100_technoise_fit_fdr1Eminus4_density.pdf")
## [1] 10117
## Warning: Use of `dat$log10_mean` is discouraged. Use `log10_mean` instead.
## Warning: Use of `dat$log10_overall_cv2` is discouraged. Use `log10_overall_cv2`
## instead.
## Saving 7 x 5 in image
## Warning: Use of `dat$log10_mean` is discouraged. Use `log10_mean` instead.

## Warning: Use of `dat$log10_overall_cv2` is discouraged. Use `log10_overall_cv2`
## instead.

## Warning: Use of `dat$log10_mean` is discouraged. Use `log10_mean` instead.

## Warning: Use of `dat$log10_overall_cv2` is discouraged. Use `log10_overall_cv2`
## instead.
## Saving 7 x 5 in image
## Warning: Use of `dat$log10_mean` is discouraged. Use `log10_mean` instead.

## Warning: Use of `dat$log10_overall_cv2` is discouraged. Use `log10_overall_cv2`
## instead.

nrow(gen100_fdr1eminus4)
## [1] 1384
write.table(rownames(gen100_fdr1eminus4),file="gen_100_HV22Gs_fdr1e-4.txt",quote=FALSE,row.names = FALSE)

write.table(rownames(dat_100[which(dat_100$log10_mean>1),]),file="gen_100_HV22Gs_BACKGROUND_LIST.txt",quote=FALSE,row.names = FALSE)
gen25_noise_data<-loess_fit_and_plotres(dat_25,"fdr",0.05,return_all = TRUE)
## [1] 9952
gen100_noise_data<-loess_fit_and_plotres(dat_100,"fdr",1e-4,return_all = TRUE)
## [1] 10117
piRNA_targets<-read.table("../07_Gene_Sets/piRNA_targets_2fold"); piRNA_targets<-piRNA_targets$x
piRNA_targets_4x<-read.table("../07_Gene_Sets/piRNA_targets_4fold"); piRNA_targets_4x<-piRNA_targets_4x$x
CSR1_targets<-read.table("../07_Gene_Sets/CSR1_targets"); CSR1_targets<-CSR1_targets$x
HRDE1_targets<-read.table("../07_Gene_Sets/WAGO9_targets"); HRDE1_targets<-HRDE1_targets$x
WAGO1_targets<-read.table("../07_Gene_Sets/WAGO1_targets"); WAGO1_targets<-WAGO1_targets$x

gen25_noise_data$piRNA<-factor(rep(0,nrow(gen25_noise_data)),levels = c(0,1)); gen25_noise_data[which(rownames(gen25_noise_data) %in% piRNA_targets),"piRNA"]<-1
gen100_noise_data$piRNA<-factor(rep(0,nrow(gen100_noise_data)),levels = c(0,1)); gen100_noise_data[which(rownames(gen100_noise_data) %in% piRNA_targets),"piRNA"]<-1

gen25_noise_data$CSR1<-factor(rep(0,nrow(gen25_noise_data)),levels = c(0,1)); gen25_noise_data[which(rownames(gen25_noise_data) %in% CSR1_targets),"CSR1"]<-1
gen100_noise_data$CSR1<-factor(rep(0,nrow(gen100_noise_data)),levels = c(0,1)); gen100_noise_data[which(rownames(gen100_noise_data) %in% CSR1_targets),"CSR1"]<-1

gen25_noise_data$HRDE1<-factor(rep(0,nrow(gen25_noise_data)),levels = c(0,1)); gen25_noise_data[which(rownames(gen25_noise_data) %in% HRDE1_targets),"HRDE1"]<-1
gen100_noise_data$HRDE1<-factor(rep(0,nrow(gen100_noise_data)),levels = c(0,1)); gen100_noise_data[which(rownames(gen100_noise_data) %in% HRDE1_targets),"HRDE1"]<-1

gen25_noise_data$WAGO1<-factor(rep(0,nrow(gen25_noise_data)),levels = c(0,1)); gen25_noise_data[which(rownames(gen25_noise_data) %in% WAGO1_targets),"WAGO1"]<-1
gen100_noise_data$WAGO1<-factor(rep(0,nrow(gen100_noise_data)),levels = c(0,1)); gen100_noise_data[which(rownames(gen100_noise_data) %in% WAGO1_targets),"WAGO1"]<-1

TEs<-read.table("TE_cov0.8_all_txs.txt"); TEs<-TEs$V2
gen25_noise_data$TE<-factor(rep(0,nrow(gen25_noise_data)),levels = c(0,1)); gen25_noise_data[which(rownames(gen25_noise_data) %in% TEs),"TE"]<-1
gen100_noise_data$TE<-factor(rep(0,nrow(gen100_noise_data)),levels = c(0,1)); gen100_noise_data[which(rownames(gen100_noise_data) %in% TEs),"TE"]<-1


#plot gen 25 data
ggplot(gen25_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=CSR1))+
  color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
  theme_classic()+
  geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_25_CSR1_targets.pdf",dpi=150)
## Saving 7 x 5 in image
ggplot(gen25_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=HRDE1))+
  color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
  theme_classic()+
  geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_25_HRDE1_targets.pdf",dpi=150)
## Saving 7 x 5 in image
ggplot(gen25_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=piRNA))+
  color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
  theme_classic()+
  geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_25_piRNA_targets.pdf",dpi=150)
## Saving 7 x 5 in image
ggplot(gen25_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=WAGO1))+
  color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
  theme_classic()+
  geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_25_WAGO1_targets.pdf",dpi=150)
## Saving 7 x 5 in image
ggplot(gen25_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=TE))+
  color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
  theme_classic()+
  geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_25_TEs.pdf",dpi=150)
## Saving 7 x 5 in image
ggplot(gen25_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=TE))+
  color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
  theme_classic()+
  geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_25_TEs.png",dpi=300,device = "png")
## Saving 7 x 5 in image
pdf("gen_25_Zscores.pdf")
boxplot(gen25_noise_data[which(gen25_noise_data$piRNA==1),"Z"],
        gen25_noise_data[which(gen25_noise_data$HRDE1==1),"Z"],
        gen25_noise_data[which(gen25_noise_data$WAGO1==1),"Z"],
        gen25_noise_data[which(gen25_noise_data$CSR1==1),"Z"],
        gen25_noise_data[which(gen25_noise_data$TE==1),"Z"],
        names=c("piRNA","HRDE-1","WAGO-1","CSR-1","TEs"),outline=FALSE,las=2,cex=0.5,ylab="Z-score")
dev.off()
## quartz_off_screen 
##                 2
#plot gen 100 data
ggplot(gen100_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=CSR1))+
    color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
    theme_classic()+
    geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_100_CSR1_targets.pdf",dpi=150)
## Saving 7 x 5 in image
ggplot(gen100_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=HRDE1))+
    color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
    theme_classic()+
    geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_100_HRDE1_targets.pdf",dpi=150)
## Saving 7 x 5 in image
ggplot(gen100_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=piRNA))+
    color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
    theme_classic()+
    geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_100_piRNA_targets.pdf",dpi=150)
## Saving 7 x 5 in image
ggplot(gen100_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=WAGO1))+
    color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
    theme_classic()+
    geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_100_WAGO1_targets.pdf",dpi=150)
## Saving 7 x 5 in image
ggplot(gen100_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=TE))+
    color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
    theme_classic()+
    geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_100_TEs.pdf",dpi=150)
## Saving 7 x 5 in image
ggplot(gen100_noise_data)+geom_point(aes(x=log10_mean,y=log10_overall_cv2,color=TE))+
    color_palette(c(alpha("lightblue",0.2),alpha("red",0.5)))+
    theme_classic()+
    geom_line(aes(x=x,y=y))+geom_line(aes(x=x,y=lower),linetype="dashed")+geom_line(aes(x=x,y=upper),linetype="dashed")

ggsave("gen_100_TEs.png",dpi=300,device = "png")
## Saving 7 x 5 in image
pdf("gen_100_Zscores.pdf")
boxplot(gen100_noise_data[which(gen100_noise_data$piRNA==1),"Z"],
        gen100_noise_data[which(gen100_noise_data$HRDE1==1),"Z"],
        gen100_noise_data[which(gen100_noise_data$WAGO1==1),"Z"],
        gen100_noise_data[which(gen100_noise_data$CSR1==1),"Z"],
        gen100_noise_data[which(gen100_noise_data$TE==1),"Z"],
        names=c("piRNA","HRDE-1","WAGO-1","CSR-1","TEs"),outline=FALSE,las=2,cex=0.5,ylab="Z-score")
dev.off()
## quartz_off_screen 
##                 2
#Fano Factor with averaged data

average_datasets<-function(df){
  df_averaged<-data.frame(
    a=(df[,1]+df[,2])/2,
    b=(df[,3]+df[,4])/2,
    c=(df[,5]+df[,6])/2,
    d=(df[,7]+df[,8])/2,
    f=(df[,9]+df[,10])/2,
    g=(df[,11]+df[,12])/2,
    h=(df[,13]+df[,14])/2,
    i=(df[,15]+df[,16])/2,
    j=(df[,17]+df[,18])/2,
    k=(df[,19]+df[,20])/2,
    l=(df[,21]+df[,22])/2)
  
  rownames(df_averaged)<-rownames(df)
  return(df_averaged)
}

ttg_DEseqnorm_25_avg<-average_datasets(ttg_DEseqnorm_25)
ttg_DEseqnorm_100_avg<-average_datasets(ttg_DEseqnorm_100)

dat_25_avg<-data.frame(mean=apply(ttg_DEseqnorm_25_avg,MARGIN = 1,FUN=mean),
                       sd=apply(ttg_DEseqnorm_25_avg,MARGIN = 1,FUN=sd))
dat_25_avg$ff<-((dat_25_avg$sd)**2)/dat_25_avg$mean

dat_100_avg<-data.frame(mean=apply(ttg_DEseqnorm_100_avg,MARGIN = 1,FUN=mean),
                       sd=apply(ttg_DEseqnorm_100_avg,MARGIN = 1,FUN=sd))
dat_100_avg$ff<-((dat_100_avg$sd)**2)/dat_100_avg$mean

pdf("gen_25_ffs.pdf")
boxplot(dat_25_avg[which(rownames(dat_25_avg) %in% piRNA_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% HRDE1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% WAGO1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% CSR1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% TEs),"ff"],
        names=c("piRNA","HRDE-1","WAGO-1","CSR-1","TEs"),outline=FALSE,las=2,cex=0.5,ylab="Fano factor")
dev.off()
## quartz_off_screen 
##                 2
pdf("gen_100_ffs.pdf")
boxplot(dat_100_avg[which(rownames(dat_100_avg) %in% piRNA_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% HRDE1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% WAGO1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% CSR1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% TEs),"ff"],
        names=c("piRNA","HRDE-1","WAGO-1","CSR-1","TEs"),outline=FALSE,las=2,cex=0.5,ylab="Fano factor")
dev.off()
## quartz_off_screen 
##                 2
gen25_ffdata<-data.frame(ffs=c(dat_25_avg[which(rownames(dat_25_avg) %in% piRNA_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% HRDE1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% WAGO1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% CSR1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% TEs),"ff"]),
                         gene_class=factor(c(rep("piRNA",length(dat_25_avg[which(rownames(dat_25_avg) %in% piRNA_targets),"ff"])),
                                      rep("HRDE-1",length(dat_25_avg[which(rownames(dat_25_avg) %in% HRDE1_targets),"ff"])),
                                      rep("WAGO-1",length(dat_25_avg[which(rownames(dat_25_avg) %in% WAGO1_targets),"ff"])),
                                      rep("CSR-1",length(dat_25_avg[which(rownames(dat_25_avg) %in% CSR1_targets),"ff"])),
                                      rep("TEs",length(dat_25_avg[which(rownames(dat_25_avg) %in% TEs),"ff"])))))
gen25_ffdata$gene_class<-factor(gen25_ffdata$gene_class,levels = c("piRNA","HRDE-1","WAGO-1","CSR-1","TEs"))
ggplot(gen25_ffdata)+
  geom_boxplot(aes(y=ffs,x=gene_class))+
  theme_classic()+
  ylim(0,50)
## Warning: Removed 584 rows containing non-finite values (stat_boxplot).

gen100_ffdata<-data.frame(ffs=c(dat_100_avg[which(rownames(dat_100_avg) %in% piRNA_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% HRDE1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% WAGO1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% CSR1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% TEs),"ff"]),
                         gene_class=factor(c(rep("piRNA",length(dat_100_avg[which(rownames(dat_100_avg) %in% piRNA_targets),"ff"])),
                                      rep("HRDE-1",length(dat_100_avg[which(rownames(dat_100_avg) %in% HRDE1_targets),"ff"])),
                                      rep("WAGO-1",length(dat_100_avg[which(rownames(dat_100_avg) %in% WAGO1_targets),"ff"])),
                                      rep("CSR-1",length(dat_100_avg[which(rownames(dat_100_avg) %in% CSR1_targets),"ff"])),
                                      rep("TEs",length(dat_100_avg[which(rownames(dat_100_avg) %in% TEs),"ff"])))))
gen100_ffdata$gene_class<-factor(gen100_ffdata$gene_class,levels = c("piRNA","HRDE-1","WAGO-1","CSR-1","TEs"))
ggplot(gen100_ffdata)+
  geom_boxplot(aes(y=ffs,x=gene_class))+
  theme_classic()+
  ylim(0,50)
## Warning: Removed 1467 rows containing non-finite values (stat_boxplot).

dat_25_avg<-dat_25_avg[which(dat_25_avg$mean>10),]
dat_100_avg<-dat_100_avg[which(dat_100_avg$mean>10),]

boxplot(dat_25_avg[which(rownames(dat_25_avg) %in% piRNA_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% HRDE1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% WAGO1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% CSR1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% TEs),"ff"],
        names=c("piRNA","HRDE-1","WAGO-1","CSR-1","TEs"),outline=FALSE,las=2,cex=0.5,ylab="Fano factor")

boxplot(dat_100_avg[which(rownames(dat_100_avg) %in% piRNA_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% HRDE1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% WAGO1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% CSR1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% TEs),"ff"],
        names=c("piRNA","HRDE-1","WAGO-1","CSR-1","TEs"),outline=FALSE,las=2,cex=0.5,ylab="Fano factor")

gen25_ffdata<-data.frame(ffs=c(dat_25_avg[which(rownames(dat_25_avg) %in% piRNA_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% HRDE1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% WAGO1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% CSR1_targets),"ff"],
        dat_25_avg[which(rownames(dat_25_avg) %in% TEs),"ff"]),
                         gene_class=factor(c(rep("piRNA",length(dat_25_avg[which(rownames(dat_25_avg) %in% piRNA_targets),"ff"])),
                                      rep("HRDE-1",length(dat_25_avg[which(rownames(dat_25_avg) %in% HRDE1_targets),"ff"])),
                                      rep("WAGO-1",length(dat_25_avg[which(rownames(dat_25_avg) %in% WAGO1_targets),"ff"])),
                                      rep("CSR-1",length(dat_25_avg[which(rownames(dat_25_avg) %in% CSR1_targets),"ff"])),
                                      rep("TEs",length(dat_25_avg[which(rownames(dat_25_avg) %in% TEs),"ff"])))))
gen25_ffdata$gene_class<-factor(gen25_ffdata$gene_class,levels = c("piRNA","HRDE-1","WAGO-1","CSR-1","TEs"))
ggplot(gen25_ffdata)+
  geom_boxplot(aes(y=ffs,x=gene_class))+
  theme_classic()+
  coord_cartesian(ylim=c(0,50))

gen100_ffdata<-data.frame(ffs=c(dat_100_avg[which(rownames(dat_100_avg) %in% piRNA_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% HRDE1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% WAGO1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% CSR1_targets),"ff"],
        dat_100_avg[which(rownames(dat_100_avg) %in% TEs),"ff"]),
                         gene_class=factor(c(rep("piRNA",length(dat_100_avg[which(rownames(dat_100_avg) %in% piRNA_targets),"ff"])),
                                      rep("HRDE-1",length(dat_100_avg[which(rownames(dat_100_avg) %in% HRDE1_targets),"ff"])),
                                      rep("WAGO-1",length(dat_100_avg[which(rownames(dat_100_avg) %in% WAGO1_targets),"ff"])),
                                      rep("CSR-1",length(dat_100_avg[which(rownames(dat_100_avg) %in% CSR1_targets),"ff"])),
                                      rep("TEs",length(dat_100_avg[which(rownames(dat_100_avg) %in% TEs),"ff"])))))
gen100_ffdata$gene_class<-factor(gen100_ffdata$gene_class,levels = c("piRNA","HRDE-1","WAGO-1","CSR-1","TEs"))
ggplot(gen100_ffdata)+
  geom_boxplot(aes(y=ffs,x=gene_class))+
  theme_classic()+
  coord_cartesian(ylim=c(0,50))